#Warning ignorance if generated
import warnings
warnings.filterwarnings("ignore")
#import necessary python packages for single-cell RNA SEQ analysis
import scanpy as sc #software suite of tools for single-cell analysis in python
import besca as bc #internal BEDA package for single cell analysis
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import scipy
import anndata as ad
from scipy.sparse import csr_matrix
import scanpy.external as sce
from harmony import harmonize
import umap.umap_ as umap
from scipy import io
print(ad.__version__)
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
# gives error!! sc.logging.print_versions()
INFO:torch.distributed.nn.jit.instantiator:Created a temporary directory at /tmp/tmp2ghhn8aa INFO:torch.distributed.nn.jit.instantiator:Writing /tmp/tmp2ghhn8aa/_remote_module_non_scriptable.py INFO:lightning_fabric.utilities.seed:Global seed set to 0
0.9.1
#Reading last saved annoatated data object written in h5ad data format.
#We used similar adata variable to make similar previous data analysis
save_file = '/home/jana/scanpy_qc_filtered_pbmcs_for_sarcoid.h5ad'
adata=sc.read_h5ad(save_file)
#Display last saved adata object
print(adata)
AnnData object with n_obs × n_vars = 67346 × 26113
obs: 'type', 'sample', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'percent_mt2', 'n_counts', 'n_genes', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'leiden', 'leiden_1.0', 'leiden_0.7', 'leiden_0.8', 'initial_annotation'
var: 'gene_ids', 'feature_types', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
uns: 'dendrogram_leiden_0.7', 'doublet_info_colors', 'hvg', 'initial_annotation_colors', 'leiden', 'leiden_0.7_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'leiden_colors', 'log1p', 'neighbors', 'pca', 'rank_genes_groups', 'sample_colors', 'umap'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
obsp: 'connectivities', 'distances'
#metadata of saved adata object
adata.obs
| type | sample | batch | n_genes_by_counts | total_counts | total_counts_mt | pct_counts_mt | total_counts_ribo | pct_counts_ribo | total_counts_hb | ... | n_counts | n_genes | doublet_scores | predicted_doublets | doublet_info | leiden | leiden_1.0 | leiden_0.7 | leiden_0.8 | initial_annotation | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| AAACCCAAGACATAAC-1-0 | Sarc | Sarc-1 | 0 | 385 | 585.0 | 27.0 | 4.615385 | 32.0 | 5.470086 | 1.0 | ... | 585.0 | 385 | 0.027344 | False | False | 29 | 22 | 5 | 19 | CD14+ Mono |
| AAACCCAAGAGGCGGA-1-0 | Sarc | Sarc-1 | 0 | 2191 | 5556.0 | 423.0 | 7.613391 | 613.0 | 11.033117 | 2.0 | ... | 5556.0 | 2191 | 0.064067 | False | False | 10 | 7 | 6 | 7 | CD14+ Mono |
| AAACCCAAGCGTACAG-1-0 | Sarc | Sarc-1 | 0 | 936 | 2864.0 | 253.0 | 8.833798 | 1131.0 | 39.490223 | 0.0 | ... | 2864.0 | 936 | 0.021978 | False | False | 5 | 5 | 2 | 2 | B cell |
| AAACCCAAGGTACAAT-1-0 | Sarc | Sarc-1 | 0 | 3622 | 11581.0 | 736.0 | 6.355237 | 1679.0 | 14.497885 | 2.0 | ... | 11581.0 | 3622 | 0.089888 | False | False | 8 | 3 | 0 | 3 | CD14+ Mono |
| AAACCCACAGCGTACC-1-0 | Sarc | Sarc-1 | 0 | 2219 | 6849.0 | 536.0 | 7.825960 | 1114.0 | 16.265148 | 0.0 | ... | 6849.0 | 2219 | 0.016505 | False | False | 13 | 10 | 9 | 10 | CD16 Mono |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| TTTGTTGGTTCAAGGG-1-5 | healthy | healthy-3 | 5 | 1541 | 5318.0 | 271.0 | 5.095901 | 2216.0 | 41.669800 | 1.0 | ... | 5318.0 | 1541 | 0.036342 | False | False | 11 | 0 | 1 | 1 | Naive CD4T |
| TTTGTTGTCACCTGGG-1-5 | healthy | healthy-3 | 5 | 1880 | 5073.0 | 345.0 | 6.800710 | 1458.0 | 28.740391 | 0.0 | ... | 5073.0 | 1880 | 0.073171 | False | False | 15 | 6 | 7 | 6 | Mixed |
| TTTGTTGTCATTGAGC-1-5 | healthy | healthy-3 | 5 | 1696 | 6433.0 | 331.0 | 5.145344 | 2995.0 | 46.556816 | 0.0 | ... | 6433.0 | 1696 | 0.064109 | False | False | 4 | 0 | 1 | 1 | Naive CD4T |
| TTTGTTGTCCGATGTA-1-5 | healthy | healthy-3 | 5 | 3787 | 12527.0 | 777.0 | 6.202602 | 1858.0 | 14.831964 | 0.0 | ... | 12527.0 | 3787 | 0.074398 | False | False | 8 | 3 | 0 | 3 | CD14+ Mono |
| TTTGTTGTCGTGGCTG-1-5 | healthy | healthy-3 | 5 | 1639 | 4419.0 | 373.0 | 8.440824 | 950.0 | 21.498077 | 0.0 | ... | 4419.0 | 1639 | 0.012614 | False | False | 7 | 6 | 7 | 6 | Mixed |
67346 rows × 22 columns
# Displaying Leiden clustering with chosen of 0.7 resolution
sc.pl.umap(adata, color="leiden_0.7", use_raw=False, legend_loc="on data")
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
# Reclustering cluster 2 Temporary annotation B-cell
#Isolate cluster 2 from main leiden clusters named as cluster2_adata
cluster2_adata = adata[adata.obs['leiden_0.7'].isin(['2' ]),:]
#Looking the annotated cluster2_adata
print (cluster2_adata)
View of AnnData object with n_obs × n_vars = 7217 × 26113
obs: 'type', 'sample', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'percent_mt2', 'n_counts', 'n_genes', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'leiden', 'leiden_1.0', 'leiden_0.7', 'leiden_0.8', 'initial_annotation'
var: 'gene_ids', 'feature_types', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
uns: 'dendrogram_leiden_0.7', 'doublet_info_colors', 'hvg', 'initial_annotation_colors', 'leiden', 'leiden_0.7_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'log1p', 'neighbors', 'pca', 'rank_genes_groups', 'sample_colors', 'umap'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
obsp: 'connectivities', 'distances'
#Computing with a series of resolution parameters and silhouette_scores.
#Like various algorithms, Leiden has also a parameter named the resolution.
#It can control the coarseness of the clustering.
#Higher values of resolution mean it leads to more clusters.
#Computing Silhouette Coefficient or Silhouette Score, a metric that was used to calculate the goodness of a clustering.
# -1 <= silhouette score<= 1.
from sklearn.metrics import silhouette_score
# Define a list of resolution parameters
#resolutions = [round(r, 2) for r in [.05] + list(np.linspace(.1, 1.6, 16))]
resolutions=[0.1,0.2,0.3, 0.4,0.5, 0.6, 0.7,0.8, 1.0, 1.2]
# Print a message indicating the start of the computation
print("Computing silhouette scores with different resolution parameters")
# Iterate over each resolution parameter and compute the silhouette score
for resolution in resolutions:
# Apply the Leiden clustering algorithm with the current resolution parameter
sc.tl.leiden(cluster2_adata, resolution=resolution)
# Compute the silhouette score for the clustering result
silhouette2 = silhouette_score(cluster2_adata.obsm['X_umap'], cluster2_adata.obs[f'leiden'])
# Print the silhouette score for the current resolution parameter
print(f"Silhouette score for resolution {resolution}: {silhouette2}")
Computing silhouette scores with different resolution parameters running Leiden clustering
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/tools/_leiden.py:158: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual.
finished: found 2 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:01)
Silhouette score for resolution 0.1: 0.3733758330345154
running Leiden clustering
finished: found 3 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
Silhouette score for resolution 0.2: 0.4038565754890442
running Leiden clustering
finished: found 4 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
Silhouette score for resolution 0.3: 0.2567930221557617
running Leiden clustering
finished: found 6 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
Silhouette score for resolution 0.4: 0.17308494448661804
running Leiden clustering
finished: found 8 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
Silhouette score for resolution 0.5: 0.07684057950973511
running Leiden clustering
finished: found 9 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
Silhouette score for resolution 0.6: 0.1091974675655365
running Leiden clustering
finished: found 9 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
Silhouette score for resolution 0.7: 0.11528532952070236
running Leiden clustering
finished: found 10 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:01)
Silhouette score for resolution 0.8: 0.11307550221681595
running Leiden clustering
finished: found 14 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:01)
Silhouette score for resolution 1.0: 0.04247363656759262
running Leiden clustering
finished: found 17 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
Silhouette score for resolution 1.2: 0.0004957361961714923
#ReClustering of Cluster 2 neighborhood graph using Leiden Clustering algorithm with Resolution of 0.1
sc.pp.neighbors(cluster2_adata, n_neighbors=10, n_pcs=50)
sc.tl.leiden(cluster2_adata, resolution=0.1)
sc.tl.umap(cluster2_adata)
computing neighbors
using 'X_pca' with n_pcs = 50
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:02)
running Leiden clustering
finished: found 2 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:24)
#Cluster 2 Some geneexpresion of all B-cell markers genes in UMAP space
sc.pl.umap(cluster2_adata, color = ['leiden','CD27','CD24','BCL6','CD40','CD38','CD74','AIM2','TCL1A', 'RPL18A','PRDM1'], wspace = 0.2, legend_loc="on data")
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
# After Violin plot seeking the expression changes different markers genes,
#trying to make a temporary table for manual annotations
from tabulate import tabulate
bcell_table = [
['Cluster no', 'Type of Cell'],
['0', 'B-Naive'],
['1', 'B Memory'],
]
print(tabulate(bcell_table))
---------- ------------ Cluster no Type of Cell 0 B-Naive 1 B Memory ---------- ------------
#B-cell markers provide by AZIMUTH tool
B_intermediate = ['MS4A1', 'TNFRSF13B', 'IGHM', 'IGHD', 'AIM2', 'CD79A', 'LINC01857', 'RALGPS2', 'BANK1', 'CD79B']
B_memory = ['MS4A1', 'COCH', 'AIM2', 'BANK1', 'SSPN', 'CD79A', 'TEX9', 'RALGPS2', 'TNFRSF13C', 'LINC01781']
B_naive = ['IGHM', 'IGHD', 'CD79A', 'IL4R', 'MS4A1', 'CXCR4', 'BTG1', 'TCL1A', 'CD79B', 'YBX3']
#Dotplot
sc.pl.dotplot(adata, B_memory, groupby='leiden_0.7', dendrogram=True)
print ("--------")
#Dotplot
sc.pl.dotplot(cluster2_adata, B_memory, groupby='leiden', dendrogram=True)
print ("--------")
#Dotplot
sc.pl.dotplot(cluster2_adata, B_naive, groupby='leiden', dendrogram=True)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_dotplot.py:749: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
-------- WARNING: Dendrogram not added. Dendrogram is added only when the number of categories to plot > 2
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_dotplot.py:749: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
-------- WARNING: Dendrogram not added. Dendrogram is added only when the number of categories to plot > 2
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_dotplot.py:749: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
#cluster 2 reclustered annotations
new_2_cluster_names = ['B_naive', #0
'B_memory', #1
]
bc.tl.annotate_cells_clustering(adata=cluster2_adata, clustering_label='leiden', new_annotation_label='initial_annotation_2', new_cluster_labels=new_2_cluster_names)
#cluster 2 reclustered annotations UMAP
sc.pl.umap(cluster2_adata, color = ['leiden','initial_annotation_2'], legend_loc="on data")
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
# Reclustering cluster 17 Temporary annotation Mixed cells
#Isolate cluster 17 from main leiden clusters named as cluster2_adata
cluster17_adata = adata[adata.obs['leiden_0.7'].isin(['17' ]),:]
#ReClustering of Cluster 17 neighborhood graph using Leiden Clustering algorithm with Resolution of 0.1
sc.pp.neighbors(cluster17_adata, n_neighbors=10, n_pcs=50)
sc.tl.leiden(cluster17_adata, resolution=0.1)
sc.tl.umap(cluster17_adata)
computing neighbors
using 'X_pca' with n_pcs = 50
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
running Leiden clustering
finished: found 4 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:01)
#Finding Marker genes inside cluster 17 mixed
#Logarithmize the data
sc.pp.log1p(cluster17_adata)
#Finding marker genes using Wilcoxon rank-sum method
sc.tl.rank_genes_groups(cluster17_adata, 'leiden', method='wilcoxon')
#Showing Top 25 Genes scorer of each clusters
sc.pl.rank_genes_groups(cluster17_adata, n_genes=25, sharey=False)
WARNING: adata.X seems to be already log-transformed.
ranking genes
finished: added to `.uns['rank_genes_groups']`
'names', sorted np.recarray to be indexed by group ids
'scores', sorted np.recarray to be indexed by group ids
'logfoldchanges', sorted np.recarray to be indexed by group ids
'pvals', sorted np.recarray to be indexed by group ids
'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:01)
#dnT markers provided by Azimuth and checked markers genes inside cluster 17 mixed
sc.pl.umap(cluster17_adata, color = ['leiden','PTPN3', 'MIR4422HG', 'NUCB2', 'CAV1', 'DTHD1', 'GZMA', 'MYB', 'FXYD2', 'GZMK', 'AC004585.1'], legend_loc="on data")
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
# Displaying a table of Top 5 highly differentialy genes inside cluster 17 mixed reculstered
pd.DataFrame(cluster17_adata.uns['rank_genes_groups']['names']).head(5)
| 0 | 1 | 2 | 3 | |
|---|---|---|---|---|
| 0 | HMGB2 | MZB1 | NKG7 | GZMK |
| 1 | HMGN2 | UBE2J1 | GZMB | TMSB4X |
| 2 | HMGB1 | TNFRSF17 | CTSW | AC004585.1 |
| 3 | H2AFZ | TXNDC5 | GNLY | GPR183 |
| 4 | MKI67 | JCHAIN | PRF1 | TCF7 |
#Nk_proliferation markers genes inside cluster 17 reclustered
sc.pl.umap(cluster17_adata, color = ['leiden','MKI67', 'KLRF1', 'TYMS', 'TRDC', 'TOP2A', 'FCER1G', 'PCLAF', 'CD247', 'CLSPN', 'ASPM'], legend_loc="on data")
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
#Plasmablast markers checking cluster 17 reclustered
#Plasmablast = ['IGHA2', 'MZB1', 'TNFRSF17', 'DERL3', 'TXNDC5', 'TNFRSF13B', 'POU2AF1', 'CPNE5', 'HNT5DC2']
sc.pl.umap(cluster17_adata, color = ['leiden','IGHA2', 'MZB1', 'TNFRSF17', 'DERL3', 'TXNDC5', 'TNFRSF13B', 'POU2AF1', 'CPNE5'], legend_loc="on data")
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
#Cluster 17 reclustered new annotation
new_17_cluster_names = ['NK_proli', #0
'Plasmablast', #1
'NK', #2
'dnT', #3
]
bc.tl.annotate_cells_clustering(adata=cluster17_adata, clustering_label='leiden', new_annotation_label='initial_annotation_17', new_cluster_labels=new_17_cluster_names)
#cluster 17 reclustering cell annotation
sc.pl.umap(cluster17_adata, color = ['leiden','initial_annotation_17'], legend_loc="on data")
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
# Reclustering cluster 7 Temporary mixed cells
#Isolate cluster 7 from main leiden clusters named as cluster7_adata
cluster7_adata = adata[adata.obs['leiden_0.7'].isin(['7' ]),:]
#ReClustering of Cluster 7 neighborhood graph using Leiden Clustering algorithm with Resolution of 0.3
sc.pp.neighbors(cluster7_adata, n_neighbors=10, n_pcs=50)
sc.tl.leiden(cluster7_adata, resolution=0.3)
sc.tl.umap(cluster7_adata)
computing neighbors
using 'X_pca' with n_pcs = 50
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:02)
running Leiden clustering
finished: found 3 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:17)
sc.pl.umap(cluster7_adata, color = ['leiden','IL32','IL7R','TRAC', 'TRDC','TRBC1','TRGC1','CD8B', 'S100B', 'CCR7', 'CD8A','CD4', 'GNLY', 'TYROBP', 'NKG7'], legend_loc="on data", wspace = 0.2)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
#Finding Marker genes
#Logarithmize the data
sc.pp.log1p(cluster7_adata)
#Finding marker genes using Wilcoxon rank-sum method
sc.tl.rank_genes_groups(cluster7_adata, 'leiden', method='wilcoxon')
#Showing Top 25 Genes scorer of each clusters
sc.pl.rank_genes_groups(cluster7_adata, n_genes=25, sharey=False)
WARNING: adata.X seems to be already log-transformed.
ranking genes
finished: added to `.uns['rank_genes_groups']`
'names', sorted np.recarray to be indexed by group ids
'scores', sorted np.recarray to be indexed by group ids
'logfoldchanges', sorted np.recarray to be indexed by group ids
'pvals', sorted np.recarray to be indexed by group ids
'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:16)
# Displaying a table of Top 5 highly differentialy highly scorer expressed genesfor each clusters in Leiden clustering
pd.DataFrame(cluster7_adata.uns['rank_genes_groups']['names']).head(5)
| 0 | 1 | 2 | |
|---|---|---|---|
| 0 | NKG7 | IL7R | S100A4 |
| 1 | GNLY | GZMK | CD52 |
| 2 | GZMB | RPL13 | B2M |
| 3 | KLRD1 | LTB | SH3BGRL3 |
| 4 | GZMH | EEF1A1 | ITGB1 |
# Reclustering cluster 0 of cluster 7 Temporary mixed cells
#Isolate cluster0 from cluster 7 mixed main leiden clusters named as cluster7_0_adata
cluster7_0_adata = cluster7_adata[cluster7_adata.obs['leiden'].isin(['0']),:]
#cluster 0 of cluster 7 reclustering object
cluster7_0_adata
View of AnnData object with n_obs × n_vars = 3214 × 26113
obs: 'type', 'sample', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'pct_counts_hb', 'percent_mt2', 'n_counts', 'n_genes', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'leiden', 'leiden_1.0', 'leiden_0.7', 'leiden_0.8', 'initial_annotation'
var: 'gene_ids', 'feature_types', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
uns: 'dendrogram_leiden_0.7', 'doublet_info_colors', 'hvg', 'initial_annotation_colors', 'leiden', 'leiden_0.7_colors', 'leiden_0.8_colors', 'leiden_1.0_colors', 'log1p', 'neighbors', 'pca', 'rank_genes_groups', 'sample_colors', 'umap', 'leiden_colors'
obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
obsp: 'connectivities', 'distances'
#Cluster 0 isolated from Cluster 7 reclusters
sc.pl.umap(cluster7_0_adata, color = ['leiden','NKG7'], legend_loc="on data", wspace = 0.2)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
#ReClustering of Cluster 0 from cluster 7 neighborhood graph using Leiden Clustering algorithm with Resolution of 0.3
sc.pp.neighbors(cluster7_0_adata, n_neighbors=10, n_pcs=50)
sc.tl.leiden(cluster7_0_adata, resolution=0.4)
sc.tl.umap(cluster7_0_adata)
computing neighbors
using 'X_pca' with n_pcs = 50
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
running Leiden clustering
finished: found 3 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:11)
#Cluster 0 isolated from Cluster 7 reclusters
sc.pl.umap(cluster7_0_adata, color = ['leiden','NKG7'], legend_loc="on data", wspace = 0.2)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
#Finding Marker genes
#Logarithmize the data
sc.pp.log1p(cluster7_0_adata)
#Finding marker genes using Wilcoxon rank-sum method
sc.tl.rank_genes_groups(cluster7_0_adata, 'leiden', method='wilcoxon')
#Showing Top 25 Genes scorer of each clusters
sc.pl.rank_genes_groups(cluster7_0_adata, n_genes=25, sharey=False)
WARNING: adata.X seems to be already log-transformed.
ranking genes
finished: added to `.uns['rank_genes_groups']`
'names', sorted np.recarray to be indexed by group ids
'scores', sorted np.recarray to be indexed by group ids
'logfoldchanges', sorted np.recarray to be indexed by group ids
'pvals', sorted np.recarray to be indexed by group ids
'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:09)
# Displaying a table of Top 5 highly differentialy highly scorer expressed genesfor each clusters in Leiden clustering
pd.DataFrame(cluster7_0_adata.uns['rank_genes_groups']['names']).head(5)
| 0 | 1 | 2 | |
|---|---|---|---|
| 0 | GNLY | CD8A | TRGC1 |
| 1 | TYROBP | CD8B | TRDC |
| 2 | KLRC3 | TRBC2 | KLRC1 |
| 3 | KLRC2 | TRAC | KLRB1 |
| 4 | KLRF1 | THEMIS | CD247 |
#Some markers genes exprssed inside cluster 0 of cluster 7 (cluster7_0)
sc.pl.umap(cluster7_0_adata, color = ['leiden','IL32','IL7R','TRAC', 'TRDC','TRBC1','TRGC1','CD8B', 'S100B', 'CCR7', 'CD8A','CD4', 'GNLY', 'TYROBP', 'NKG7'], legend_loc="on data", wspace = 0.2)
/home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:163: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. /home/jana/my-notebook-venv/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored